load("02_01_m5s_blog_post_lda.RData")

require(stringr)
pat <- '\\bcittadin'
blog_post$citizen_bool <- grepl(pat, blog_post$text)
blog_post$citizen_count <- str_count(blog_post$text, pat)
blog_post$word_count <- str_count(blog_post$text, "\\w+")

require(dplyr)
monthly <- 
  blog_post %>%
  dplyr::group_by(date = format(as.Date(date), "%Y-%m-%15")) %>%
  dplyr::summarize(n_posts = n(),
                   n_words = sum(word_count),
                   citiz_posts = sum(citizen_bool),
                   citiz_words = sum(citizen_count))
require(zoo)
monthly$rollmean_posts <-
  rollmean(monthly$citiz_posts / monthly$n_posts, k = 12, fill = NA)
monthly$rollmean_words <-
  rollmean(monthly$citiz_words / monthly$n_words, k = 12, fill = NA)

require(ggplot2)
require(scales) 
plot <- 
  ggplot(monthly, aes(x=as.Date(date), y=citiz_posts/n_posts)) +
  geom_line() +
  geom_line(aes(y=rollmean_posts), colour = 'blue') +
  scale_y_continuous(label = scales::percent) +
  labs(x=NULL, y='posts') +
  geom_vline(xintercept = as.numeric(as.Date("2010-05-05")), linetype = 'dashed', alpha = .5) +
  geom_vline(xintercept = as.numeric(as.Date("2011-05-11")), linetype = 'dashed', alpha = .5) +
  geom_vline(xintercept = as.numeric(as.Date("2011-09-17")), linetype = 'dashed', alpha = .5) +
  geom_text(data = data.frame(x = as.Date(c("2010-05-05", "2011-05-11", "2011-09-17")),
                              y = .55,
                              label = c('(1)', '(2)', '(3)')),
            aes(x=x, y=y,label=label))


